#function for percentiles and standardization
perc.rank <- function(x) {
  y<-rank(x)/length(x)
  return(y)}
std<-function(x) (x-mean(x,na.rm=T))/sd(x,na.rm=T)

#compute student component percentiles
data_student_teacher_expenditure<-data_student_teacher_exp_raw
data_student_teacher_expenditure$mandatory.student<-NA
data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$disciplina_obligatorie_scris_final),]<-data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$disciplina_obligatorie_scris_final),] %>%
  group_by(an,mandatory_subject_student) %>%
  mutate(mandatory.student=perc.rank(disciplina_obligatorie_scris_final))

data_student_teacher_expenditure$elective.student<-NA
data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$disciplina_alegere_aria_culiculara_final),]<-data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$disciplina_alegere_aria_culiculara_final),] %>%
  group_by(an,elective_subject_student) %>%
  mutate(elective.student=perc.rank(disciplina_alegere_aria_culiculara_final))

data_student_teacher_expenditure$romanian.student<-NA
data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$lb_romana_final),]<-data_student_teacher_expenditure[!is.na(data_student_teacher_expenditure$lb_romana_final),] %>%
  group_by(an) %>%
  mutate(romanian.student=perc.rank(lb_romana_final))



#split data by subject
data_student_teacher_expenditure_long<-data_student_teacher_expenditure %>%
  ungroup() %>%
  mutate(id=seq.int(nrow(data_student_teacher_expenditure))) %>%
  rename(romanian.teacher=teacher_perc.ro,
         elective.teacher=teacher_perc.elect,
         mandatory.teacher=teacher_perc.mand,
         romanian.gpa=gpa_perc.ro,
         elective.gpa=gpa_perc.elect,
         mandatory.gpa=gpa_perc.mand,
         romanian.Experience=Experience.ro,
         elective.Experience=Experience.elect,
         mandatory.Experience=Experience.mand,
         romanian.Education=Education_num.ro,
         elective.Education=Education_num.elect,
         mandatory.Education=Education_num.mand,
         romanian.Teacher_Category_num=Teacher_Category_num.ro,
         elective.Teacher_Category_num=Teacher_Category_num.elect,
         mandatory.Teacher_Category_num=Teacher_Category_num.mand) %>%
  select(id_bac,id_adm,n_hs_town_group,grad_perc,entrance_perc,
         elective_subject_student,mandatory_subject_student,
         school_harmonized,unitate_de_invatamant,liceu_repartizat,scoala_de_provenienta,
         an,judet_bac,town,town_hs_bac,id,dec,quart_town,dec_town,n_hs_town_group,
         class_mean,school_mean,class_mean_yr,school_mean_yr,
         n_students_hs_yr,specializare_bac,specializare_bac2,
         entrance_perc_math,entrance_perc_ro,entrance_perc_exam,entrance_perc_gpa,
         media,media_la_admitere,n_students_town_yr,
         Cod_SIIIR_hs_bac,Cod_SIRUTA_hs_bac,
         "romanian.student","romanian.teacher",romanian.gpa,romanian.Experience,romanian.Teacher_Category_num,
         #"romanian.class_mean","romanian.class_mean_yr","romanian.school_mean","romanian.school_mean_yr",
         "elective.student","elective.teacher",elective.gpa,elective.Experience,elective.Teacher_Category_num,
         #"elective.class_mean","elective.class_mean_yr","elective.school_mean","elective.school_mean_yr",
         "mandatory.student","mandatory.teacher",mandatory.gpa,mandatory.Experience,mandatory.Teacher_Category_num,
         romanian.Education,elective.Education,mandatory.Education
         #"mandatory.class_mean","mandatory.class_mean_yr","mandatory.school_mean","mandatory.school_mean_yr",
  )
data_student_teacher_expenditure_long<-as.data.frame(data_student_teacher_expenditure_long)
data_student_teacher_expenditure_long<-reshape(data_student_teacher_expenditure_long,
                                               direction='long',
                                               varying=list(student=c("romanian.student","elective.student","mandatory.student"),
                                                            teacher=c("romanian.teacher","elective.teacher","mandatory.teacher"),
                                                            gpa=c("romanian.gpa","elective.gpa","mandatory.gpa"),
                                                            Experience=c("romanian.Experience","elective.Experience","mandatory.Experience"),
                                                            Teacher_Category_num=c("romanian.Teacher_Category_num","elective.Teacher_Category_num","mandatory.Teacher_Category_num"),
                                                            Education_num=c("romanian.Education","elective.Education","mandatory.Education")),
                                               timevar='subject',
                                               times=c('romanian', 'elective','mandatory'),
                                               v.names=c('student', 'teacher_test','teacher_gpa','teacher_exp','teacher_rank','teacher_education'),
                                               #,"class_mean","school_mean","class_mean_yr","school_mean_yr"),
                                               idvar="id")









#add mandatory or elective component
data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>%
  mutate(subject2=ifelse(subject=='romanian','romanian','Other')) %>%
  mutate(subject2=ifelse(subject=='elective',elective_subject_student,subject2)) %>%
  mutate(subject2=ifelse(subject=='mandatory',mandatory_subject_student,subject2))
data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>%
  mutate(subject2=ifelse(subject=='elective' & is.na(subject2),'Other Elective',subject2)) %>%
  mutate(subject2=ifelse(subject=='mandatory' & is.na(subject2),'Other Mandatory',subject2)) %>%
  mutate(subject2=ifelse(subject=='mandatory',mandatory_subject_student,subject2))

data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>% 
  mutate(quart_teacher=cut(teacher_test, breaks = c(-Inf, 0.25,0.5,0.75, Inf), 
                           labels = c('1','2','3','4'), right = FALSE))

#get school/track mean over time using harmoized school/track codes
data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>%
  group_by(school_harmonized) %>%
  mutate(school_mean_math=mean(entrance_perc_math,na.rm=T),
         school_mean_ro=mean(entrance_perc_ro,na.rm=T)) %>%
  group_by(school_harmonized,an) %>%
  mutate(school_mean_math_yr=mean(entrance_perc_math,na.rm=T),
         school_mean_ro_yr=mean(entrance_perc_ro,na.rm=T))

data_reg<-data_student_teacher_expenditure_long


#standardize variables
data_student_teacher_expenditure_long<-data_student_teacher_expenditure_long %>%
  mutate(teacher_education=std(teacher_education),
         teacher_rank=std(teacher_rank),
         teacher_gpa=std(teacher_gpa),
         teacher_exp=std(teacher_exp))

#regressions
baseline_school<-feols(I(student*100)~I(entrance_perc_ro*100)+I(entrance_perc_math*100)+I(teacher_test*100)+teacher_gpa+teacher_exp+teacher_rank+teacher_education|
                         specializare_bac2+elective_subject_student+mandatory_subject_student+
                         as.factor(an)+town+scoala_de_provenienta|
                         I(school_mean*100)~dec_town*n_hs_town_group,
                       cluster=~town,
                       data=data_student_teacher_expenditure_long)
summary(baseline_school)
r2(baseline_school)

baseline_school_subject<-feols(I(student*100)~I(entrance_perc_ro*100)+
                                 I(entrance_perc_math*100)+
                                 I(teacher_test*100)*elective_subject_student+
                                 teacher_gpa+teacher_exp+teacher_rank+
                                 teacher_education|
                                 specializare_bac2+elective_subject_student+mandatory_subject_student+
                                 as.factor(an)+town+scoala_de_provenienta|
                                 I(school_mean*100)~dec_town*n_hs_town_group,
                               cluster=~town,
                               data=data_student_teacher_expenditure_long)
summary(baseline_school_subject)
r2(baseline_school_subject)






baseline_school_ability<-feols(I(student*100)~I(entrance_perc_ro*100)*I(teacher_test*100)+I(entrance_perc_math*100)*I(teacher_test*100)+I(teacher_test*100)+teacher_gpa+teacher_exp+teacher_rank+teacher_education|
                                 specializare_bac2+elective_subject_student+mandatory_subject_student+
                                 as.factor(an)+town+scoala_de_provenienta|
                                 I(school_mean*100)~dec_town*n_hs_town_group,
                               cluster=~town,
                               data=data_student_teacher_expenditure_long )
summary(baseline_school_ability)
r2(baseline_school_ability)






#expenditures
# setwd(wd_data_final)
# data_exp<-readRDS('data_student_expenditure_anon')
data_exp<-data_student_exp_raw
data_exp<-data_exp %>%
  group_by(an_adm,liceu_repartizat) %>%
  mutate(Exp_stud=Expenditure/n())

model_exp_results<-feols(I(grad_perc*100)~I(Exp_stud/1000)+I(entrance_perc*100)+dec_town+n_hs_town_group+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
                   as.factor(an)+specializare_bac2+scoala_de_provenienta+town|
                   I(school_mean*100)~dec_town*n_hs_town_group,
                 cluster=~judet_bac+town,
                 data=data_exp %>% filter(school_change==F))
summary(model_exp_results)



#Tables
#write results
options(modelsummary_format_numeric_latex = "plain")
f_big<-function(x) format(x, big.mark=",", scientific=FALSE, nsmall=1,digits=1)
f <- function(x) format(round(x/1000000,1) , nsmall = 1)

current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
fileConn<-file("09_table_07.txt")
writeLines(
  print(modelsummary(list("Baseline"=baseline_school,
                          #"Ability"=baseline_school_ability,
                          #"Subject"=baseline_school_subject,
                          "Expenditure"=model_exp_results
  ),
  statistic = "std.error",
  estimate="{estimate}{stars}",
  stars=c('$^{*}$'=0.1,'$^{**}$'=0.05,'$^{***}$'=0.01),
  gof_map=list(list("raw" = "nobs", "clean" = "N", "fmt" = f_big),
               list("raw" = "r.squared", "clean" = "R$^2$",fmt="%.2f")),
  metrics="R2",
  output="latex",
  escape=F)
  ), fileConn)
close(fileConn)




#subject-specif teacher table in appendix
f_big<-function(x) format(x, big.mark=",", scientific=FALSE, nsmall=1,digits=1)
f <- function(x) format(round(x/1000000,1) , nsmall = 1)
options(modelsummary_format_numeric_latex = "plain")
print(modelsummary(list(
                        "Ability"=baseline_school_ability,
                        "Subject"=baseline_school_subject
),
statistic = "std.error",
estimate="{estimate}{stars}",
stars=c('$^{*}$'=0.1,'$^{**}$'=0.05,'$^{***}$'=0.01),
gof_map=list(list("raw" = "nobs", "clean" = "N", "fmt" = f_big),
             list("raw" = "r.squared", "clean" = "R$^2$",fmt="%.2f")),
metrics="R2",
output="latex",
fmt=3,
escape=F))

#teacher placement score is best predictor - appendix
placement_score_best<-feols(I(student*100)~I(entrance_perc_ro*100)+I(entrance_perc_math*100)+I(teacher_test*100)+teacher_gpa+teacher_exp+teacher_rank+teacher_education+I(school_mean*100)|
                              specializare_bac2+elective_subject_student+mandatory_subject_student+
                              as.factor(an)+town+scoala_de_provenienta,
                            cluster=~town,
                            data=data_student_teacher_expenditure_long )
summary(placement_score_best)

options(modelsummary_format_numeric_latex = "plain")
print(modelsummary(list("Baseline"=placement_score_best),
                   statistic = "std.error",
                   estimate="{estimate}{stars}",
                   stars=c('$^{*}$'=0.1,'$^{**}$'=0.05,'$^{***}$'=0.01),
                   gof_map=list(list("raw" = "nobs", "clean" = "N", "fmt" = f_big),
                                list("raw" = "r.squared", "clean" = "R$^2$",fmt="%.2f")),
                   metrics="R2",
                   output="latex",
                   escape=F))




